# ----------------------------------------------------------------------
# ' this file process rx data, and produce intermediate files
# ' opioid prescription files for each quarter
# ' by BK (bl11@indiana.edu)
# ----------------------------------------------------------------------

load_library = c('bit64','data.table','fst','future.apply','stringr','logger')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))
options(scipen=999)

bucket = file.path('/N','project','iuni_doctorshopping')

# read arguments
args=commandArgs(TRUE)
infile = args[[1]]
outfile = args[[2]]

rdata = read_fst(infile, columns=c('PATID','PAT_PLANID',"FILL_DT","PRESCRIPT_ID",
	"DEA","NPI","PRESCRIBER_PROV","PHARM","SPCLT_IND",
	"NDC","PRC_TYP","AHFSCLSS","FORM_IND","FORM_TYP","MAIL_IND",	
	"FST_FILL","RFL_NBR","QUANTITY","DAYS_SUP","STRENGTH"), as.data.table=TRUE)

#------------------------------------------------------------------------------
# load drug list 
#==============================================================================

	drugs_all = fread(file.path(bucket,"additional_data","drug.tsv")) # use NDC for matching
	drugs_all[,NDC := create_leading_zeros(NDC,11)]
	
	# check duplicates
	#drugs_all[,.N,by='NDC'][,table(N)]

	drugs_opioid = drugs_all[Class=='Opioid',]

	drugs_cs = fread(file.path(bucket,'additional_data','Drugs','controlled_substance_NDC.csv'))

	# check duplicates
	#drugs_cs[,.N,by='NDC'][,table(N)]
	drugs_cs = unique(drugs_cs,by='NDC')

	drugs_cs[,NDC := create_leading_zeros(NDC,11)]

# identify drug types here : only include opioids drugs
	rdata = merge(rdata, drugs_opioid[,c("NDC","Strength_Per_Unit","MME_Conversion_Factor")],by='NDC') 

# there are some errors for DAYS_SUP variables (negative supply); treat it missing
	rdata[DAYS_SUP < 0, DAYS_SUP := NA]

	rdata[,mme := Strength_Per_Unit * (QUANTITY / DAYS_SUP ) * MME_Conversion_Factor]
	rdata[mme==-Inf,mme:=NA] # if DAYS_SUP == 0, then treat this as missing 
	rdata[mme== Inf,mme:=NA] # if DAYS_SUP == 0, then treat this as missing 

fwrite(rdata, outfile)

